# -*- coding: utf8 -*-
import logging
import re
import common

site_index = 'sing365'
site_keyword = 'sing365'
site_url = 'http://www.sing365.com/'
test_url = 'http://www.sing365.com/music/lyric.nsf/We-Belong-Together-lyrics-Mariah-Carey/12371122B3C05FE948256FBE00285AEB'

def get_lyric(url):
    encoding = 'utf8'

    logging.debug('url [%s]' % (url, ))

    bytes = common.get_url_content(url)
    html = bytes.decode(encoding, 'ignore')

    pattern = 'tf_artist = "(.*)";'
    artist = common.get_first_group_by_pattern(html, pattern)

    pattern = 'tf_song = "(.*)";'
    title = common.get_first_group_by_pattern(html, pattern)

    logging.debug('artist [%s], title [%s]' % (artist, title, ))

    beginPattern = 'border=0><br><br></div>'
    pos = html.find(beginPattern)
    if pos == -1:
        return 'Pattern Not Found'
        
    begin = pos + len(beginPattern) 

    endPattern = '<div align="center"><br><br><img'
    pos = html.find(endPattern, begin)
    if pos == -1:
        return 'Pattern Not Found'

    end = pos
    rawLyric = html[begin:end]

    lyric = rawLyric.replace('<br>', '').strip()

    lyric = u'%s\n\nArtist: %s\n\n\n%s' % (title, artist, lyric)

    return lyric

if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)
    url = test_url
    print url
    lyric = get_lyric(url)
    print repr(lyric) 
